/*
cd /projects/hsieh_project/proj_201809/code_2_201910/
qstata --dofile=out_mgrow2_2_mgrow.do --statatype=mp --cpucount=5 &

Calculate missing growth
*/

set linesize 255
capture log close

clear all
capture noi program drop _all
cd /projects/

di "Started at $S_TIME $S_DATE"

global rev_date: display %tdYYNNDD date("$S_DATE", "DMY")
di "${rev_date}"

global dir_proj "/projects/hsieh_project/proj_201809/"

global dir_do "${dir_proj}/201910_main/"
global dir_data "${dir_proj}/data/"
global dir_out "${dir_proj}/output/201910_main/"
noi capture mkdir ${dir_out}
global dir_outf "${dir_out}/city_mgrow2/"
capture mkdir ${dir_outf}
capture mkdir ${dir_outf}/data/

global gl_geo = "msa fips czone msa1983 msa1983cz"

global sigma = 3
global rho = 2

global year1 = 1977
global year2 = 2013

//==============================================================================

preserve
clear
global ds_out ${dir_outf}/data/cmgrow_city
save ${ds_out}, replace emptyok
restore

foreach i_city in $gl_geo {
use "${dir_data}/jobcr_`i_city'_${year1}_${year2}", clear 
// Note the change in industry would be incumbent for def 4, for instance 1997 2002 city 2 ind 773
gen city = `i_city'
gen emp = emp_s6 // Use definition 6

/*
--------------------------------------------------------------------------------
Generate a variable of employment for each type of establishment, then collapse
*/
gen emp_0 = emp // All est
gen emp_3 = emp if inlist(cind_plant_stat, 3, 4) // Incumbent est
gen emp_1 = emp if inlist(cind_plant_stat, 1, 5) // New est

collapse (sum) emp emp_0 emp_3 emp_1, by(city ch_ind syear1 syear2 year)

/*
--------------------------------------------------------------------------------
Some additional operations on emp_*
*/

sort city ch_ind syear1 syear2 year
by city ch_ind syear1 syear2: gen n_year = _N
tab n_year
foreach i_var in emp_0 emp_3 emp_1 {

	// Replace 0 with missing (since collapse replaces missing with 0)
	replace `i_var' = . if `i_var' == 0
	
	// Generate forward variable
	by city ch_ind syear1 syear2: gen emp_y2 = `i_var' if year == syear2
	by city ch_ind syear1 syear2: egen f_`i_var' = mean(emp_y2)
	drop emp_y2
	
	// When only in the second year 
	// (for new city-ind, we calculate missing growth using forward variable; see below)
	replace `i_var' = . if year == syear2 & n_year == 1
}

// Keep only the first observation
by city ch_ind syear1 syear2: keep if _n == 1
drop year emp
drop if syear1 == 1977 & syear2 == 2013 // Drop observations for year pair 1977-2013

// Merge with SV weight
merge 1:1 syear1 syear2 city ch_ind using ${dir_outf}/data/cmgrow_sv_`i_city'
// Check that calculations are the same
count if emp_0 != emp_cind
count if f_emp_0 != f_emp_cind

/*
--------------------------------------------------------------------------------
Calculate missing growth at city-ind level
*/

// Calculate employment shares
gen emps_3 = emp_3 / emp_0
gen f_emps_3 = f_emp_3 / f_emp_0

// Calculate missing growth
gen mgrow = .
replace mgrow = ln(emps_3/f_emps_3) / (${sigma}-1) if i_sato == 1 // incumbent city-industry
replace mgrow = ln(1/(1-f_emp_1/f_emp_city)) / (${rho}-1) if i_sato == 2 // new city-industry

gen cmgrow = mgrow * w_sv
save ${dir_outf}/data/cmgrow_cind_`i_city', replace

/*
--------------------------------------------------------------------------------
Aggregate missing growth to the city level
1) Keep only incumbent industries
2) Keep both incumbent and new industries
*/

preserve
keep if inlist(i_sato, 1)
collapse (sum) cmgrow (first) emp_city_*, by(syear1 syear2 city)
gen city_code = "`i_city'"
gen cind_in = 1
order city_code cind_in, first
append using ${ds_out}
save ${ds_out}, replace
restore

keep if inlist(i_sato, 1, 2)
collapse (sum) cmgrow (first) emp_city_*, by(syear1 syear2 city)
gen city_code = "`i_city'"
gen cind_in = 2
order city_code cind_in, first
append using ${ds_out}
save ${ds_out}, replace
}

di "Ended at $S_DATE $S_TIME"
// End of do file
